#library(ArchR)
library(knitr)
library(tidyverse)
library(Seurat)
library(SeuratData)
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
library(scater)
library(zellkonverter)
library(SingleCellExperiment)
library(EnsDb.Mmusculus.v79)
library(Signac)
#library(SeuratDisk)
#library(caret)
#h5disableFileLocking()
We can read in the .h5ad file as a SummarizedCellExperiment
rna_gastr_SE <- readH5AD("anndata_rna.h5ad")
We can convert a Seurat object to a SummarizedCellExperiment
SCE <- as.SingleCellExperiment(gastr_seurat)
We can convert SummarizedCellExperiment to Seurat object
seurat <- as.Seurat(test, counts = "counts", data = "logcounts")
# read in the Count matrix as a dataframe
matrix = read.csv("count_matrix_gastr.csv", row.names = 1, sep = ",")
# convert dataframe to matrix
matrix <- as.matrix(matrix)
# read in metadata
obs = read.csv("anndata_as_csvs/obs.csv")
# read in pca coordinates
obsm = read.csv("anndata_as_csvs/obsm.csv")
# genome informations
var = read.csv("anndata_as_csvs/var.csv")
var %>% head
## X gene Accession Chromosome End Start Strand
## 1 Xkr4 Xkr4 ENSMUSG00000051951 1 3671498 3205901 -
## 2 Gm1992 Gm1992 ENSMUSG00000089699 1 3513553 3466587 +
## 3 Gm19938 Gm19938 ENSMUSG00000102331 1 3658904 3647309 -
## 4 Gm37381 Gm37381 ENSMUSG00000102343 1 3986215 3905739 -
## 5 Rp1 Rp1 ENSMUSG00000025900 1 4409241 3999557 -
## 6 Sox17 Sox17 ENSMUSG00000025902 1 4497354 4490931 -
## highly_variable means dispersions dispersions_norm mean
## 1 True 0.367476906 1.3238081 2.54351380 0.204760066
## 2 False 0.006202907 0.1363408 -0.04272088 0.004248043
## 3 False 0.036352255 0.2382920 0.23294678 0.024285307
## 4 False 0.002336403 0.8813052 1.97160080 0.001403798
## 5 True 0.019664977 1.4180168 3.42282410 0.010390339
## 6 True 0.154462862 0.4595927 0.83132523 0.103584691
## std
## 1 0.46847308
## 2 0.05438302
## 3 0.13469896
## 4 0.03449084
## 5 0.10144195
## 6 0.27630131
# pca coordinates
varm = read.csv("anndata_as_csvs/varm.csv")
# add cells and genes as row/column names respectively
colnames(matrix) <- var$gene
rownames(matrix) <- obs$cell
# Have a look at th ematrix
matrix[0:5, 0:5]
# conver to sparse matrix
matrix_sparse <- as(matrix, "sparseMatrix")
matrix_sparse[0:5, 0:5]
# we transpose the count matrix to genes x cells
#gastr_seurat <- CreateSeuratObject(counts = t(matrix_sparse), project = "mouse_gastrulation")
#saveRDS(gastr_seurat, "Seurat_rna_gastr/Seurat_object1")
gastr_seurat <- readRDS("Seurat_objects/rna_seurat1.rds")
# we can add the metadata columns to the seurat object
gastr_seurat <- AddMetaData(gastr_seurat, obs %>%
column_to_rownames("cell"))
gastr_seurat@meta.data %>% head
## orig.ident nCount_RNA nFeature_RNA sample
## E7.5_rep1#AAACAGCCAAACTCAT-1 E7.5 7538 2929 E7.5_rep1
## E7.5_rep1#AAACAGCCACAACCTA-1 E7.5 12134 4125 E7.5_rep1
## E7.5_rep1#AAACAGCCATCCTGAA-1 E7.5 6033 2676 E7.5_rep1
## E7.5_rep1#AAACAGCCATGCTATG-1 E7.5 11992 3984 E7.5_rep1
## E7.5_rep1#AAACATGCAACCTGGT-1 E7.5 11571 3702 E7.5_rep1
## E7.5_rep1#AAACATGCAATGAATG-1 E7.5 10923 4029 E7.5_rep1
## barcode stage mitochondrial_percent_RNA
## E7.5_rep1#AAACAGCCAAACTCAT-1 AAACAGCCAAACTCAT-1 E7.5 16.10507
## E7.5_rep1#AAACAGCCACAACCTA-1 AAACAGCCACAACCTA-1 E7.5 20.09230
## E7.5_rep1#AAACAGCCATCCTGAA-1 AAACAGCCATCCTGAA-1 E7.5 21.63103
## E7.5_rep1#AAACAGCCATGCTATG-1 AAACAGCCATGCTATG-1 E7.5 24.96664
## E7.5_rep1#AAACATGCAACCTGGT-1 AAACATGCAACCTGGT-1 E7.5 14.47584
## E7.5_rep1#AAACATGCAATGAATG-1 AAACATGCAATGAATG-1 E7.5 15.92969
## ribosomal_percent_RNA pass_rnaQC celltype.mapped
## E7.5_rep1#AAACAGCCAAACTCAT-1 12.072168 True Primitive_Streak
## E7.5_rep1#AAACAGCCACAACCTA-1 7.507829 True ExE_ectoderm
## E7.5_rep1#AAACAGCCATCCTGAA-1 6.364993 True Nascent_mesoderm
## E7.5_rep1#AAACAGCCATGCTATG-1 8.138759 True Epiblast
## E7.5_rep1#AAACATGCAACCTGGT-1 12.730101 True Parietal_endoderm
## E7.5_rep1#AAACATGCAATGAATG-1 5.319052 True Somitic_mesoderm
## celltype.score closest.cell hybrid_score
## E7.5_rep1#AAACAGCCAAACTCAT-1 0.76 cell_79749 0.18205486
## E7.5_rep1#AAACAGCCACAACCTA-1 1.00 cell_80539 0.03686339
## E7.5_rep1#AAACAGCCATCCTGAA-1 0.64 cell_82825 0.14067649
## E7.5_rep1#AAACAGCCATGCTATG-1 0.80 cell_84725 0.74835041
## E7.5_rep1#AAACATGCAACCTGGT-1 1.00 cell_4248 0.27021904
## E7.5_rep1#AAACATGCAATGAATG-1 0.76 cell_13439 0.30786430
## doublet_call TSSEnrichment_atac ReadsInTSS_atac
## E7.5_rep1#AAACAGCCAAACTCAT-1 False 12.369 406
## E7.5_rep1#AAACAGCCACAACCTA-1 False NA NA
## E7.5_rep1#AAACAGCCATCCTGAA-1 False 10.097 1596
## E7.5_rep1#AAACAGCCATGCTATG-1 False 9.157 2770
## E7.5_rep1#AAACATGCAACCTGGT-1 False 17.397 1722
## E7.5_rep1#AAACATGCAATGAATG-1 False 16.820 2149
## PromoterRatio_atac NucleosomeRatio_atac
## E7.5_rep1#AAACAGCCAAACTCAT-1 0.2428083 2.442157
## E7.5_rep1#AAACAGCCACAACCTA-1 NA NA
## E7.5_rep1#AAACAGCCATCCTGAA-1 0.1533246 1.437051
## E7.5_rep1#AAACAGCCATGCTATG-1 0.1664038 1.236268
## E7.5_rep1#AAACATGCAACCTGGT-1 0.2454128 2.435776
## E7.5_rep1#AAACATGCAATGAATG-1 0.2148524 2.196279
## nFrags_atac BlacklistRatio_atac pass_atacQC
## E7.5_rep1#AAACAGCCAAACTCAT-1 3511 0.07661635 False
## E7.5_rep1#AAACAGCCACAACCTA-1 NA NA False
## E7.5_rep1#AAACAGCCATCCTGAA-1 22048 0.01317580 True
## E7.5_rep1#AAACAGCCATGCTATG-1 37415 0.01683817 True
## E7.5_rep1#AAACATGCAACCTGGT-1 13080 0.04510703 True
## E7.5_rep1#AAACATGCAATGAATG-1 19411 0.01542940 True
## celltype.predicted sample_batch
## E7.5_rep1#AAACAGCCAAACTCAT-1 Primitive_Streak E7.5_rep1#-1
## E7.5_rep1#AAACAGCCACAACCTA-1 ExE_ectoderm E7.5_rep1#-1
## E7.5_rep1#AAACAGCCATCCTGAA-1 Nascent_mesoderm E7.5_rep1#-1
## E7.5_rep1#AAACAGCCATGCTATG-1 Epiblast E7.5_rep1#-1
## E7.5_rep1#AAACATGCAACCTGGT-1 Parietal_endoderm E7.5_rep1#-1
## E7.5_rep1#AAACATGCAATGAATG-1 Somitic_mesoderm E7.5_rep1#-1
## initial_size_spliced initial_size_unspliced
## E7.5_rep1#AAACAGCCAAACTCAT-1 6130 637
## E7.5_rep1#AAACAGCCACAACCTA-1 7147 3678
## E7.5_rep1#AAACAGCCATCCTGAA-1 3349 2018
## E7.5_rep1#AAACAGCCATGCTATG-1 7554 3098
## E7.5_rep1#AAACATGCAACCTGGT-1 7945 2200
## E7.5_rep1#AAACATGCAATGAATG-1 5340 4177
## initial_size leiden
## E7.5_rep1#AAACAGCCAAACTCAT-1 6130 0
## E7.5_rep1#AAACAGCCACAACCTA-1 7147 1
## E7.5_rep1#AAACAGCCATCCTGAA-1 3349 6
## E7.5_rep1#AAACAGCCATGCTATG-1 7554 0
## E7.5_rep1#AAACATGCAACCTGGT-1 7945 26
## E7.5_rep1#AAACATGCAATGAATG-1 5340 6
tibble(sample = gastr_seurat@meta.data$sample, umi_per_cell = colSums(gastr_seurat@assays$RNA@counts)) %>%
arrange(sample, desc(umi_per_cell)) %>%
group_by(sample) %>%
mutate(idx = seq_along(sample)) %>%
mutate(cum_umi_per_cell = cumsum(umi_per_cell)) %>%
ggplot() +
geom_line(aes(x = idx, y = cum_umi_per_cell)) +
facet_wrap(~sample, scales = "fixed") +
scale_y_log10() + scale_x_log10() +
ylab("cumulative UMI count") +
xlab("index")
variables <- c("nFeature_RNA", "nCount_RNA", "mitochondrial_percent_RNA", "ribosomal_percent_RNA")
map(variables, function(n){
df <- gastr_seurat@meta.data
ggplot(df) +
geom_boxplot(aes(x = df %>% pull("sample"), y = df %>% pull(n),
fill = df %>% pull("sample")), alpha = .1) +
geom_violin(aes(x = df %>% pull("sample"), y = df %>% pull(n),
fill = df %>% pull("sample")), alpha = .5) +
xlab("sample") +
ylab(paste0(n)) +
labs(title = paste0(n)) +
guides(fill=guide_legend(title="sample"))
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
The annotations are similar across the different samples. However, there are some differences in the confidence with which cells can me mapped for different celltypes. For example, cardiomyocytes, erythroids and extraembryonic ectoderm and endoderm (but not mesoderm), mesenchyme and parietal endoderm can be mapped with very high confidence.
gastr_seurat@meta.data %>%
ggplot() +
geom_histogram(aes(x = celltype.score)) +
facet_wrap(~sample, scales = "fixed")
#scale_y_log10()
gastr_seurat@meta.data %>%
ggplot() +
geom_boxplot(aes(x = celltype.mapped, y = celltype.score, fill = celltype.mapped)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.8, hjust=1)) + NoLegend()
gastr_seurat <- gastr_seurat %>%
NormalizeData(verbose = FALSE) %>%
ScaleData(verbose = FALSE) %>%
FindVariableFeatures(verbose = FALSE)
#gastr_seurat@assays$RNA@data[0:10, 0:10]
I will proceed with 15 PCs.
gastr_seurat <- RunPCA(gastr_seurat, features = VariableFeatures(gastr_seurat),
verbose = FALSE)
ElbowPlot(gastr_seurat)
pca_plots <- comprehenr::to_list(for (i in 1:15)
DimPlot(gastr_seurat, reduction = "pca", dims = i:(i+1)) +
theme())
#pca_plots
#gridExtra::grid.arrange(unlist(pca_plots), ncol = 3, nrow = 5)
gastr_seurat <- FindNeighbors(gastr_seurat, verbose = FALSE)
gastr_seurat <- FindClusters(gastr_seurat, verbose = FALSE, resolution = 0.9)
gastr_seurat <- RunUMAP(gastr_seurat, verbose = FALSE, dims = 1:15)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
colPalette_celltypes = c('#532C8A',
'#c19f70',
'#f9decf',
'#c9a997',
'#B51D8D',
'#3F84AA',
'#9e6762',
'#354E23',
'#F397C0',
'#ff891c',
'#635547',
'#C72228',
'#f79083',
'#EF4E22',
'#989898',
'#7F6874',
'#8870ad',
'#647a4f',
'#EF5A9D',
'#FBBE92',
'#139992',
'#cc7818',
'#DFCDE4',
'#8EC792',
'#C594BF',
'#C3C388',
'#0F4A9C',
'#FACB12',
'#8DB5CE',
'#1A1A1A',
'#C9EBFB',
'#DABE99',
'#65A83E',
'#005579',
'#CDE088',
'#f7f79e',
'#F6BFCB')
celltypes <- (gastr_seurat@meta.data %>% group_by(celltype.mapped) %>%
summarise(n = n()))$celltype.mapped
col <- setNames(colPalette_celltypes, celltypes)
DimPlot(gastr_seurat, reduction = "umap", pt.size = 1,
group.by = "celltype.mapped", label = TRUE, repel = TRUE, cols = col) +
NoLegend()
DimPlot(gastr_seurat, reduction = "umap", pt.size = 1,
group.by = "celltype.mapped", cols = col) #, label = TRUE, repel = TRUE) +
DimPlot(gastr_seurat, reduction = "umap", pt.size = .1,
group.by = "seurat_clusters", label = TRUE) +
NoLegend()
p1 <- DimPlot(gastr_seurat, reduction = "umap", pt.size = .1, group.by = "sample")
p2 <- FeaturePlot(gastr_seurat, reduction = "umap", pt.size = .1,
features = "nCount_RNA") +
scale_color_viridis_c()
p3 <- FeaturePlot(gastr_seurat, reduction = "umap", pt.size = .1, features = "mitochondrial_percent_RNA") +
scale_color_viridis_c()
p4 <- FeaturePlot(gastr_seurat, reduction = "umap", pt.size = .1, features = "nFeature_RNA") +
scale_color_viridis_c()
gridExtra::grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
DimPlot(gastr_seurat, reduction = "umap", pt.size = 1,
group.by = "celltype.mapped", split.by = "orig.ident", ncol = 1, cols = col)
gastr_seurat@meta.data %>%
group_by(orig.ident, celltype.mapped) %>%
summarise(Total = n()) %>%
mutate(freq = Total/sum(Total)) %>%
ggplot(aes(x = celltype.mapped, y = freq, fill = orig.ident)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust=1)) +
facet_wrap(~orig.ident, ncol = 1)
We can see that nascent mesoderm, epiblast and primitive streak are only present at E75. Also, extraembryonice endoderm and ectoderm are highest and decrease with progressing gastrulation. Forebrain/Midbrain/Hindbrain, neural crest and neuromesodermal progenitor (NMP) cells are not present at E7.5, but emerge at E8.0 and are present at even higher percentage at E8.5. The same is true for Allantois, erythroids (produce red blood cells, remain in bone marrow)and cardiomyocytes.
(Pijuan_Sala et.al, A single-cell molecular map of mouse gastrulation and early organogenesis, 2019, Nature)
gastr_seurat@meta.data %>% ggplot() +
geom_bar(aes(x = orig.ident, fill = celltype.mapped), alpha = .6)
# plot the frequency of each cell type at each embryonic stage
# all frequencies for one embryonic stage would add up to 0
gastr_seurat@meta.data %>%
group_by(orig.ident, celltype.mapped) %>%
summarise(Total = n()) %>%
mutate(freq = Total/sum(Total)) %>%
ggplot(aes(x = celltype.mapped, y = freq, fill = orig.ident)) +
geom_bar(position = "dodge", stat = "identity") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.8, hjust=1))
subset(gastr_seurat, subset = orig.ident == "E7.5")
## An object of class Seurat
## 32245 features across 13068 samples within 1 assay
## Active assay: RNA (32245 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, umap
stages <- c("E7.5", "E8.0", "E8.5")
seurat_objects <- map(stages, function(n){
gastr_subset <- subset(gastr_seurat, subset = orig.ident == n)
gastr_subset <- gastr_subset %>%
NormalizeData(verbose = FALSE) %>%
ScaleData(verbose = FALSE) %>%
FindVariableFeatures(verbose = FALSE)
gastr_subset <- gastr_subset %>% RunPCA(features = VariableFeatures(gastr_subset), verbose = FALSE) %>%
FindNeighbors(verbose = FALSE) %>%
FindClusters(verbose = FALSE, resolution = .9) %>%
RunUMAP(verbose = TRUE, dims = 1:15)
list(name = n, object = gastr_subset)
})
e75 <- seurat_objects[[1]]$object
e80 <- seurat_objects[[2]]$object
e85 <- seurat_objects[[3]]$object
umaps <- comprehenr::to_list(for (i in 1:3)
DimPlot(gastr_seurat, reduction = "pca", dims = i:(i+1)) +
theme())
plots <- map(seq.int(1,3), function(n){
object <- seurat_objects[[n]]$object
p1 <- DimPlot(object, reduction = "umap", group.by = "celltype.mapped",
cols = col, size = .1) +
labs(title = paste0(seurat_objects[[n]]$name, " celltypes"))
list(plot = p1)
})
gridExtra::grid.arrange(plots[[1]]$p, plots[[2]]$p, plots[[3]]$p, ncol = 1)
# read in atac anndata
atac_gastr <- readH5AD("anndata_atac.h5ad")
atac_gastr
## class: SingleCellExperiment
## dim: 203484 38158
## metadata(0):
## assays(1): X
## rownames(203484): chr1:3035578-3036178 chr1:3044896-3045496 ...
## chrX:169925470-169926070 chrX:169935781-169936381
## rowData names(5): chr start end score idx
## colnames(38158): E8.5_rep1#GTGGATGCACTAGCGT-1
## E8.5_rep1#CCGCTAGCATATTGAC-1 ... E8.0_rep2#CGCCTGTGTACTGAAT-1
## E8.0_rep2#TTGTGAGGTTTAAAGC-1
## colData names(34): BlacklistRatio nDiFrags ... ReadsInPeaks FRIP
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# convert SingleCellExperiment to Seurat object
atac_seurat <- as.Seurat(atac_gastr, counts = "X", data = "X")
# adding metadata
atac_seurat <- AddMetaData(atac_seurat, metadata = as.data.frame(atac_gastr@colData))
# genome annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
# change to UCSC style
seqlevelsStyle(annotations) <- "UCSC"
# add gene information to the seurat object
Annotation(atac_seurat) <- annotations
atac_seurat@meta.data %>%
ggplot() +
geom_density2d_filled(aes(x=log10(nFrags_atac ), y=TSSEnrichment_atac), bins=20) +
geom_hline(yintercept = 5, color="green", linetype="dashed") +
geom_vline(xintercept = 3, color="green", linetype="dashed") +
#geom_xsidedensity(aes(x=log10(pre_filter_meta$nFrags))) +
#geom_ysidedensity(aes(y = pre_filter_meta$TSSEnrichment)) +
facet_wrap(~sample) +
theme(legend.position = "none") +
labs(x = "Log10 Unique Fragments", y = "TSS Enrichment Score")
p1 <- atac_seurat@meta.data %>%
ggplot() +
ggridges::geom_density_ridges(aes(x = TSSEnrichment, y = Sample, fill = Sample),
alpha = .6)
p2 <- atac_seurat@meta.data %>%
ggplot() +
geom_violin(aes(x = Sample, y = TSSEnrichment, fill = Sample), alpha = 0.6) +
geom_boxplot(aes(x = Sample, y = TSSEnrichment,fill = Sample), alpha = 0.1) +
theme(legend.position = "none") +
labs(title = "TSS Enrichment")
cowplot::plot_grid(p2, p1, ncol = 2)
p1 <- atac_seurat@meta.data %>%
ggplot() +
ggridges::geom_density_ridges(aes(x = nFrags_atac , y = Sample, fill = Sample),
alpha = .6)
p2 <- atac_seurat@meta.data %>%
ggplot() +
geom_violin(aes(x = Sample, y = nFrags_atac , fill = Sample), alpha = 0.6) +
geom_boxplot(aes(x = Sample, y = nFrags_atac ,fill = Sample), alpha = 0.1) +
theme(legend.position = "none") +
labs(title = "TSS Enrichment")
cowplot::plot_grid(p2, p1, ncol = 2)
Latent Semantic Indexing
atac_seurat_test <- RunTFIDF(atac_seurat)
atac_seurat_test <- FindTopFeatures(atac_seurat_test)
atac_seurat_test <- RunSVD(atac_seurat_test)
The first LSI component often captures sequencing depth. We will therefore remove it from downstream analysis. The correlation between sequencing depth and each LSI component is shown in the plot below.
DepthCor(atac_seurat_test)
atac_seurat <- RunUMAP(atac_seurat_test, reduction = "lsi", dims = 2:30)
aatac_seurat <- FindNeighbors(atac_seurat, reduction = "lsi", dims = 2:30)
# for Clsutering instead of Louvian SLM algorithm is used
#atac_seurat <- FindClusters(atac_seurat, verbose = FALSE, algorithm = 3)
DimPlot(atac_seurat, group.by = "celltype.mapped", pt.size = 1, cols = col) +
labs(title = "scATAC-seq Celltype")
DimPlot(gastr_seurat , group.by = "celltype.mapped", pt.size = 1, cols = col,
reduction = "umap") +
labs(title = "scRNA-seq Celltype")
atac_seurat@meta.data %>%
ggplot(aes(x = log10(nCount_originalexp ), y = log10(nFrags_atac))) +
geom_point(alpha = .2, size = .2) +
ggside::geom_xsidedensity() +
ggside::geom_ysidedensity() +
facet_wrap(~sample) +
labs(x = "Log10 Counts", y = "log10 Unique Fragments")
atac_seurat@meta.data %>%
ggplot(aes(x = log10(nCount_originalexp), y = log10(TSSEnrichment))) +
geom_point(size = .2, alpha = .2) +
ggside::geom_xsidedensity() +
ggside::geom_ysidedensity() +
facet_wrap(~sample)
atac_seurat@meta.data %>%
ggplot() +
geom_histogram(aes(x = PromoterRatio_atac))
atac_seurat@meta.data %>%
ggplot() +
geom_histogram(aes(x = NucleosomeRatio_atac))
atac_seurat@meta.data %>%
mutate(col = case_when(
PassQC == 1 & pass_rnaQC == TRUE~ "Both",
PassQC == 1 & pass_rnaQC == TRUE ~ "Only ATAC",
PassQC == 0 & pass_rnaQC == TRUE ~ "Only RNA",
PassQC == 0 & pass_rnaQC == TRUE ~ "None"
)) %>%
ggplot(aes(x = log10(nCount_originalexp), y = log10(nFrags_atac))) +
geom_point(aes(color = col), size = 0.4, alpha = 0.4) +
scale_color_manual(values = c("Both" = "forestgreen",
"Only RNA" = "blue",
"Only ATAC" = "orange",
"None" = "grey"))
Why does this plot look different when using the RNA-seq dataset?
gastr_seurat@meta.data %>%
mutate(col = case_when(
pass_atacQC == "True" & pass_rnaQC == "True" ~ "Both",
pass_atacQC =="True" & pass_rnaQC == "False" ~ "Only ATAC",
pass_atacQC =="False" & pass_rnaQC == "True" ~ "Only RNA",
pass_atacQC =="False" & pass_rnaQC == "False" ~ "None"
)) %>%
ggplot(aes(x = log10(nCount_RNA), y = log10(nFrags_atac))) +
geom_point(aes(color = col), size = 0.4, alpha = 0.4) +
scale_color_manual(values = c("Both" = "forestgreen",
"Only RNA" = "blue",
"Only ATAC" = "orange",
"None" = "grey"))
## Warning: Removed 3210 rows containing missing values (geom_point).
gastr_seurat@meta.data %>%
dplyr::filter(pass_atacQC == "True") %>%
mutate(col = case_when(
pass_atacQC == "True" & pass_rnaQC == "True" ~ "Both",
pass_atacQC =="True" & pass_rnaQC == "False" ~ "Only ATAC",
pass_atacQC =="False" & pass_rnaQC == "True" ~ "Only RNA",
pass_atacQC =="False" & pass_rnaQC == "False" ~ "None"
)) %>%
ggplot(aes(x = log10(nCount_RNA), y = log10(nFrags_atac))) +
geom_point(aes(color = col), size = 0.4, alpha = 0.4) +
scale_color_manual(values = c("Both" = "forestgreen",
"Only RNA" = "blue",
"Only ATAC" = "orange",
"None" = "grey"))
We will remove all cells from the scRNA-seq dataset which do not pass the scATAC-seq QC and vice versa.
#rna_seurat_filt <- subset(gastr_seurat, pass_atacQC == "True")
# atac_seurat_filt <- subset(atac_seurat, pass_rnaQC == TRUE)